File name: 2 RSSI Analysis v5 seaborn & plotly
Find the relationship of
import datetime
current_datetime = datetime.datetime.today()
print("This .ipynb last executed at {dt}".format(dt=current_datetime))
import os #Check the path only
import pandas as pd
pd.set_option('display.max_columns', None) #Setting to display All columns of Dataframe
import seaborn as sb
import numpy as np
import matplotlib.pyplot as plt # we only need pyplot
sb.set() # set the default Seaborn style for graphics
#Import the most common plotly libraries
import plotly.offline as py
import plotly.figure_factory as ff
import plotly.graph_objs as go
from plotly import tools
# Activate inline plotting in notebook
py.init_notebook_mode(connected = False)
#Import data from .xlsx
fileName = 'cleaned_nCr_rssi_tst_loc2_04_3E81CB_20200316'
excelData = pd.read_excel(fileName+'.xlsx', index_col=0)
# excelData = pd.read_csv(fileName+'.csv', index_col=0)
#Check the data types of all columns
print(excelData.info())
print("\n\n==============================================================================================")
#Testing, data at the 272nd row
# excelData.loc[272]
#Convert type to 'string'
# excelData['duplicate'] = csvData['duplicate'].astype(str) #
# excelData = excelData.assign(station=excelData['station'].astype(str)) #another way to convert to 'string'
excelData.head()
#Make a copy
sortedData = excelData.copy()
#Change type to
# sortedData[['rssi', 'snr', 'avgSnr']] = sortedData[['rssi', 'snr', 'avgSnr']].apply(pd.to_numeric)
#Sort data by 'station'
sortedData = sortedData.sort_values('station')
sortedData.head()
#Extract only the RSSI measurement, without LLS results
df = sortedData[sortedData['note'].str.contains('rssi')]
print('BS ID\t\t' + 'count of measurements')
print('____________________________')
print(df['station'].value_counts().sort_index(ascending=True))
#Get the base station ID
bsId = df['station'].unique() #.unique() gets Unique Values from a Column
#Number of BS ID
numbOfBS = len(bsId)
print('\n=== Unique BS ID in this location =============')
print(bsId)
print('@type: ' + str(type(bsId)))
print('@numb of BS: ' + str(len(bsId)))
print('\n=== testing =============')
print(sortedData['station'][0])
#Extract only the RSSI measurement, without LLS results
allData = sortedData[sortedData['note'].str.contains('rssi')].copy()
#Store RSSI values according to base station ID in a dictionary.
rssiByBsId = {}
for item in bsId:
key = item
value = allData[(allData['station'].str.contains(item))]
rssiByBsId.update( { key: value} )
rssiByBsId[bsId[0]]
For the plot of "RSSI vs no. of measurements"
# print(len(rssiByBsId[bsId[2]]))
# for var in rssiByBsId:
# print({var: len(rssiByBsId[var])})
# index_list = []
# for index in range(len(rssiByBsId[var])):
# index_list.append(index+1)
# print(index_list)
# rssiByBsId[var].insert(loc=0, column='index', value=index_list)
# # print(rssiByBsId[var])
# rssiByBsId[bsId[3]]
import datetime
df = excelData[excelData['note']=='rssi'].copy()
# df = df[['DateRecorded','TimeRecorded']].astype(str)
timestamp_1 = df.head(1)['time'].values[0] #time of the first row (1st Sigfox message by seqNumber)
dt_obj_1 = datetime.datetime.fromtimestamp(int(timestamp_1))
timestamp_2 = df.tail(1)['time'].values[0] #time of the last row (last Sigfox message by seqNumber
dt_obj_2 = datetime.datetime.fromtimestamp(int(timestamp_2))
dt_diff = abs(dt_obj_1 - dt_obj_2)
print('1) first datetime\t{0}'.format(dt_obj_1))
print('2) last datetime\t{0}'.format(dt_obj_2))
print('3) time difference\t{0}'.format(dt_diff))
numbMeasures = len(df['seqNumber'].unique())
gpsLat = df['DeviceGPSLat'].head(1).values[0]
gpsLng = df['DeviceGPSLng'].head(1).values[0]
print('\nTime for {0} measurements at GPS({1}, {2})\n {3} (hr:min:s)'.format(numbMeasures,gpsLat,gpsLng,dt_diff))
step 1. get the time differences between the previous and next measurements
step 2. find the mean of the time difference
import datetime
df = excelData.copy()
df = df[df['note'].str.contains('rssi')] #Extract only the datatime of the RSSI measurements
seqNumber = df['seqNumber'].unique()
numList = list()
next_i = 0
for index in range(len(seqNumber)):
next_i = index+1
if (next_i <= len(seqNumber)-1):
# print(index)
# print(next_i)
previous_timestamp = df[df['seqNumber']==seqNumber[index]]['time'].values[0]
previous_time_obj = datetime.datetime.fromtimestamp(int(previous_timestamp))
next_time_timestamp = df[df['seqNumber']==seqNumber[next_i]]['time'].values[0]
next_time_obj = datetime.datetime.fromtimestamp(int(next_time_timestamp))
time_diff = abs(next_time_obj - previous_time_obj)
# print(time_diff)
numList.append(time_diff)
else:
pass
# break
# print(numList)
numList = pd.DataFrame(numList, columns=['Statistic of the Time of Measurements (D H:M:S.s)'])
numList.describe()
#test
# rssiByBsId[bsId[3]][['rssi','snr','avgSnr']].describe()
numDF = pd.DataFrame()
count = 0
for var in rssiByBsId:
df = rssiByBsId[var].reset_index()
name = var+' rssi'
df[name] = df['rssi']
df = df[name]
# print(df)
numDF = pd.concat([numDF,df], axis=1, sort=False)
count += 1
print(numDF.describe())
def singleDistplot(df, binsNumb=15, bs=""):
bsCount = str(len(df))
sb.set(font_scale=2, style='ticks')
f, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,6))
#Dist plot
from scipy.stats import norm
sb.distplot(df['rssi'],hist=True, kde=True, bins=binsNumb,norm_hist=False,
# fit=norm,
hist_kws={'edgecolor':'black','label':'measurement'}, kde_kws={'linewidth': 5, 'color':'red',"label":"PDF"})
plt.title('BS: '+bs+ ' (count= '+bsCount+')',y=1.1)
plt.xlabel('RSSI (dBm)')
plt.ylabel('Density')
plt.show()
#normal: 6C6B 8043
#strange: 7A06 7A4E 8042 8041
#strangest: 7C6B 7F0F
bs = '7F0F'
df = rssiByBsId[bs].copy()
singleDistplot(df,bs=bs)
print('no. of BS: '+str(len(rssiByBsId)))
##Self
#Change font size of the plot
#==> https://stackoverflow.com/questions/25328003/how-can-i-change-the-font-size-using-seaborn-facetgrid
sb.set(font_scale=1, style='ticks')
nrows = len(rssiByBsId)
ncols = 3
fig1, axes1 = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*5, nrows*2.7))
# fig1.subplots_adjust(top=0.8, hspace = .7)
fig1.suptitle('Distribution of RSSI of Each Station', fontsize=14, y=1)
colors = ["r", "g", "b", "m", "c"]
count_ax = 0
count_colour = 0
# print(len(colors))
print('=== The Univariate Distribution of RSSI of each base station ===')
print('y-axis is normalized. It shows a density rather than a count. This is implied if a KDE or fitted density is plotted.')
for var in rssiByBsId:
df = rssiByBsId[var]
bsCount = str(len(df))
sb.boxplot(df['rssi'], orient = "h", color = colors[count_colour], ax = axes1[count_ax,0]).set_title('BS: '+var+ ' (count= '+bsCount+')')
sb.distplot(df['rssi'], color = colors[count_colour], ax = axes1[count_ax,1]).set_title('BS: '+var+ ' (count= '+bsCount+')')
sb.violinplot(df['rssi'], color = colors[count_colour], ax = axes1[count_ax,2]).set_title('BS: '+var+ ' (count= '+bsCount+')')
# print(count_ax)
count_ax += 1
count_colour += 1
if (count_colour > (len(colors)-1)):
count_colour = 0
fig1.tight_layout()
(Plot boxplot of all rssi) (Arrange them horizontally)
# sb.set(font_scale=1.1, style="whitegrid")
# f, axes = plt.subplots(nrows=1, ncols=1, figsize=(18, 14))
# sb.boxplot(x = 'rssi', y = 'station', data = allData)
# # Add in points to show each observation
# sb.swarmplot(x="rssi", y="station", data=allData, size=3, color=".4", linewidth=0).set_title('Distribution of RSSI of Each Station', fontsize=38)
sb.set(font_scale=1, style='whitegrid')
f, axes = plt.subplots(nrows=1, ncols=1, figsize=(40, 11))
#Boxplot
sb.boxplot(x = 'station', y = 'rssi', data = allData).set_title('Distribution of RSSI of Each Station', fontsize=38)
# Add in points to show each observation
sb.swarmplot(x="station", y="rssi", data=allData, size=3, color=".4", linewidth=0)
Find the relationship of
# # Test - Correlation Matrix
# df = rssiByBsId[bsId[1]][['rssi','seqNumber']]
# print('Test of correlation matrix')
# print(df.corr())
# # Heatmap of the Correlation Matrix
# print('The correlation matrix of RSSI')
# sb.set(font_scale=4)
# ncols = 6
# nrows = int(np.ceil((len(rssiByBsId)/ncols)))
# fig2, axes2 = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*10, nrows*8))
# fig2.suptitle('Distribution of RSSI of Each Station\n(RSSI vs Number of measurements)', fontsize=80, y=1.03)
# count_ax_row = 0
# count_ax_col = 0
# for var in rssiByBsId:
# df = rssiByBsId[var]
# bsCount = str(len(df))
# # sb.boxplot(rssiByBsId[var]['rssi'], orient = "h", color = colors[count_colour], ax = axes[count_ax,0]).set_title(var+ ' (#'+bsCount+')')
# sb.heatmap(df[['rssi','seqNumber']].corr(), vmin = -1, vmax = 1, linewidths = 1,
# annot = True, fmt = ".2f", annot_kws = {"size": 50}, cmap = "RdBu", ax=axes2[count_ax_row,count_ax_col]).set_title(var+ ' (#'+bsCount+')')
# count_ax_col += 1
# if count_ax_col >= ncols:
# count_ax_row += 1
# count_ax_col = 0
# print(count_ax_row)
# fig2.tight_layout()
Find correlation between RSSI and no. of measurements of a station. (Highlight the plots if the correlation of RSSI and no. of measurements > threshold)
# sb.set(font_scale=4)
# # f3, axes3 = plt.subplots(2, 8, figsize=(100, 30))
# # sb.scatterplot(x = "index", y = "rssi", data = rssiByBsId[bsId[2]], ax=axes3[1,2], s=1000, color='red').set_title('var'+ ' (#'+bsCount+')')
# ncols = 6
# nrows = int(np.ceil((len(rssiByBsId)/ncols)))
# f3, axes3 = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*12, nrows*10))
# f3.suptitle('Distribution of RSSI of Each Station\n(RSSI vs Number of measurements)', fontsize=80, y=1.03)
# colors = ["r", "g", "b", "m", "c"]
# count_ax_col = 0
# count_ax_row = 0
# count_colour = 0
# for var in rssiByBsId:
# df = rssiByBsId[var]
# bsCount = str(len(df))
# corr = str(format(df['rssi'].corr(df['seqNumber']),'.2f'))
# # print(corr)
# sb.scatterplot(x = "seqNumber", y = "rssi", data = df, ax=axes3[count_ax_row,count_ax_col], s=1000, color=colors[count_colour]).set_title(var+'\n(#'+bsCount+')'+'(corr:'+corr+')')
# sb.regplot(x = "seqNumber", y = "rssi", data = df, ax=axes3[count_ax_row,count_ax_col],
# x_ci="sd", line_kws={"linewidth": 5},color=colors[count_colour-2]).set_title(var+'\n(#'+bsCount+')'+'(corr:'+corr+')')
# # sb.violinplot(rssiByBsId[var]['rssi'], color = colors[count_colour], ax = axes1[count_ax,2]).set_title(var+ ' (#'+bsCount+')')
# # print(count_ax)
# count_ax_col += 1
# count_colour += 1
# if count_ax_col >= ncols:
# count_ax_row += 1
# count_ax_col = 0
# print(count_ax_row)
# if (count_colour > (len(colors)-1)):
# count_colour = 0
# # f3.suptitle('Horizontally stacked subplots')
# f3.tight_layout()
def singleBSRSSI(df1,df2,bs1,bs2):
bsCount1 = str(len(df1))
bsCount2 = str(len(df2))
sb.set(font_scale=2, style='darkgrid')
f, axes = plt.subplots(nrows=1, ncols=1, figsize=(20,6))
#Line plot 1
sb.lineplot(x="Iteration", y="rssi", data=df1, markers=True, linewidth=3, color='green', label='BS: '+bs1)
sb.scatterplot(x = "Iteration", y = "rssi", data = df1, s=100, color='green')
#line plot 2
sb.lineplot(x="Iteration", y="rssi", data=df2, markers=True, linewidth=3, color='purple', label='BS: '+bs2)
sb.scatterplot(x = "Iteration", y = "rssi", data = df2, s=100, color='purple')
plt.title('BS: '+bs1 + ' (count= '+bsCount1+')'+' & '+bs2 +' (count= '+bsCount2+')',y=1.1)
plt.xlabel('Iterations')
plt.ylabel('RSSI (dBm)')
plt.show()
#normal: 6C6B 8043
#strange: 7A06 7A4E 8042 7C6B 7F0F 8041
bs1 = '6C6B'
bs2 = '7C6B'
df1 = rssiByBsId[bs1].copy()
df2 = rssiByBsId[bs2].copy()
singleBSRSSI(df1,df2,bs1,bs2)
sb.set(font_scale=4)
ncols = 6
nrows = int(np.ceil((len(rssiByBsId)/ncols)))
f3, axes3 = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*12, nrows*10))
f3.suptitle('Distribution of RSSI of Each Station\n(RSSI vs Number of measurements)', fontsize=80, y=1.03)
colors = ["r", "g", "b", "m", "c"]
count_ax_col = 0
count_ax_row = 0
count_colour = 0
for var in rssiByBsId:
df = rssiByBsId[var]
bsCount = str(len(df))
corr = str(format(df['rssi'].corr(df['seqNumber']),'.2f'))
# print(corr)
sb.lineplot(x="seqNumber", y="rssi", data=df, linewidth=7 , ax=axes3[count_ax_row,count_ax_col], color=colors[count_colour]).set_title(var+'\n(#'+bsCount+')'+'(corr:'+corr+')')
# sb.regplot(x = "seqNumber", y = "rssi", data = df, ax=axes3[count_ax_row,count_ax_col],
# x_ci="sd", line_kws={"linewidth": 5},color=colors[count_colour-2]).set_title(var+'\n(#'+bsCount+')'+'(corr:'+corr+')')
# sb.violinplot(rssiByBsId[var]['rssi'], color = colors[count_colour], ax = axes1[count_ax,2]).set_title(var+ ' (#'+bsCount+')')
# print(count_ax)
count_ax_col += 1
count_colour += 1
if count_ax_col >= ncols:
count_ax_row += 1
count_ax_col = 0
print(count_ax_row)
if (count_colour > (len(colors)-1)):
count_colour = 0
# f3.suptitle('Horizontally stacked subplots')
f3.tight_layout()
plot "accuracy of RSSI vs accuracy of LLS estimated location"
accuracy of RSSI = ‘expected RSSI based on real location’ vs ‘measured RSSI’
accuracy of LLS = location error
# #Get "Accuracy of RSSI" and prepare the dataframe for plotting
# df_plot = sortedData[sortedData['note']=='rssi'] #Only RSSI measurements are needed
# df_plot = df_plot[['seqNumber','station','rssi','ExpectedRSSI','LocalizationError']]
# df_plot['LE'] = df_plot['LocalizationError']
# df_plot['ExpRSSI-rssi'] = (df_plot['ExpectedRSSI'] - df_plot['rssi'])#.abs()
# df_plot['rssi-ExpRSSI'] = (df_plot['rssi'] - df_plot['ExpectedRSSI'])#.abs()
# # Test - Correlation Matrix
# df_test = df_plot[df_plot['station']==bsId[2]].sort_values(['seqNumber'])
# print('Test of correlation matrix')
# print(df_test[['rssi-ExpRSSI','LE']].corr())
# # Heatmap of the Correlation Matrix
# print('The correlation matrix of RSSI')
# sb.set(font_scale=3)
# ncols = 6
# nrows = int(np.ceil((len(rssiByBsId)/ncols)))
# fig2, axes2 = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*10, nrows*8))
# fig2.suptitle('Accuracy of RSSI vs Localization Error', fontsize=120, y=1.03)
# count_ax_row = 0
# count_ax_col = 0
# for var in bsId:
# df = df_plot[df_plot['station']==var]
# # print(df)
# bsCount = str(len(df))
# # sb.boxplot(rssiByBsId[var]['rssi'], orient = "h", color = colors[count_colour], ax = axes[count_ax,0]).set_title(var+ ' (#'+bsCount+')')
# sb.heatmap(df[['LE','rssi-ExpRSSI']].corr(), vmin = -1, vmax = 1, linewidths = 1,
# annot = True, fmt = ".2f", annot_kws = {"size": 50}, cmap = "RdBu", ax=axes2[count_ax_row,count_ax_col]).set_title(var+ ' (#'+bsCount+')')
# count_ax_col += 1
# if count_ax_col >= ncols:
# count_ax_row += 1
# count_ax_col = 0
# print(count_ax_row)
# fig2.tight_layout()
plot "accuracy of RSSI vs accuracy of LLS estimated location"
accuracy of RSSI = ‘expected RSSI based on real location’ vs ‘measured RSSI’
accuracy of LLS = location error
# sb.set(font_scale=3.5)
# ncols = 6
# nrows = int(np.ceil((len(rssiByBsId)/ncols)))
# f3, axes3 = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*12, nrows*10))
# f3.suptitle('Accuracy of RSSI vs Localization Error', fontsize=120, y=1.03)
# colors = ["r", "g", "b", "m", "c"]
# count_ax_col = 0
# count_ax_row = 0
# count_colour = 0
# for var in bsId:
# df = df_plot[df_plot['station']==var]
# bsCount = str(len(df))
# corr = str(format(df['rssi-ExpRSSI'].corr(df['LE']),'.2f'))
# # print(corr)
# sb.scatterplot(x = "rssi-ExpRSSI", y = "LE", data = df, ax=axes3[count_ax_row,count_ax_col], s=1000, color=colors[count_colour]).set_title(var+'\n(#'+bsCount+')'+'(corr:'+corr+')')
# # sb.violinplot(rssiByBsId[var]['rssi'], color = colors[count_colour], ax = axes1[count_ax,2]).set_title(var+ ' (#'+bsCount+')')
# # print(count_ax)
# count_ax_col += 1
# count_colour += 1
# if count_ax_col >= ncols:
# count_ax_row += 1
# count_ax_col = 0
# print(count_ax_row)
# if (count_colour > (len(colors)-1)):
# count_colour = 0
# # f3.suptitle('Horizontally stacked subplots')
# f3.tight_layout()
#Get "Accuracy of RSSI" and prepare the dataframe for plotting
df_plot = sortedData[sortedData['note']=='rssi'] #Only RSSI measurements are needed
df_plot = df_plot[['seqNumber','station','rssi','ExpectedRSSI','LocalizationError']]
df_plot['LE'] = df_plot['LocalizationError']
df_plot['ExpRSSI-rssi'] = (df_plot['ExpectedRSSI'] - df_plot['rssi'])#.abs()
df_plot['rssi-ExpRSSI'] = (df_plot['rssi'] - df_plot['ExpectedRSSI'])#.abs()
df_plot.sort_values('seqNumber').head()
#Make the name of the location and add it to the plot title
locName = fileName.replace("cleaned_", "")
lat = str(excelData['DeviceGPSLat'].values[1])
lng = str(excelData['DeviceGPSLng'].values[1])
gps = '({},{})'.format(lat,lng)
locName+=gps
print(locName)
sb.set(font_scale=2)
f4, axes4 = plt.subplots(nrows=1, ncols=1, figsize=(20, 10))
f4.suptitle('Localization Error (meters) vs Iteration\n'+locName, fontsize=30, y=0.97)
df_plot = sortedData[sortedData['note']=='lls'] #Only RSSI measurements are needed
df_plot = df_plot.reset_index(drop=True)
df_plot['Iteration'] = df_plot.index+1
df_plot
sb.lineplot(x="seqNumber", y="LocalizationError", data=df_plot, linewidth=2,color='red')
# sb.barplot(x="Iteration", y="LocalizationError", data=df_plot)
sb.regplot(x = "seqNumber", y = "LocalizationError", data = df_plot, x_ci='sd', line_kws={"linewidth": 5},scatter_kws={'s':60,'color':'red'}, robust=True)
locName
df = sortedData
min_loc_err = df['LocalizationError'].min()
print("min LE:\t",min_loc_err)
df = df[df['LocalizationError']==min_loc_err].head(1)
print('alpha:\t',df['PathLossExponent'].values)
print('ref rssi:',df['ReferenceRSSI'].values)
print()
nCr_min_loc_err = df['nCrLocalizationError'].min()
print("nCr min LE:\t",nCr_min_loc_err)
df = df[df['nCrLocalizationError']==nCr_min_loc_err].head(1)
print('nCr alpha:\t',df['nCrPathLossExponent'].values)
print('nCr ref rssi:\t',df['nCrReferenceRSSI'].values)
print()
print(fileName)